Back to Main Menu

Using Advanced Search to Bulk Export

Sample Script

The following Python script illustrates how to execute a search that spans multiple pages, and save the results to a csv file.

 

  1. """
  2. Example script to run search profile (Assetic.SearchGetAPI.py)
  3. Search is paginated so set page size. On a per page basic the search
  4. results omit columns where all records are null for that column. This is
  5. catered for by building a list of columns returned by all pages
  6. """
  7. import assetic
  8. import csv
  9. import functools
  10. import sys
  11. # Assetic SDK instance
  12. asseticsdk = assetic.AsseticSDK("c:/users/you/assetic.ini",None,"Info")
  13. # Search API
  14. sapi = assetic.SearchApi()
  15. # Define search parameters
  16. searchguid = "f9149c1b-0a43-e611-945f-06edd62954d7"
  17. numpagesize = 50
  18. # Return only those records where Asset Class = "Transport"
  19. searchfilter = "ComplexAssetClass=Transport"
  20. # set search criteria as keyword args
  21. kw = {"request_params_id":searchguid,
  22. "request_params_page":1,
  23. "request_params_sorts":"ComplexAssetName-desc",
  24. "request_params_page_size":numpagesize,
  25. "request_params_filters":searchfilter}
  26. # Get first page of results
  27. try:
  28. sg = sapi.search_get(**kw)
  29. except assetic.rest.ApiException as e:
  30. asseticsdk.logger.error("Status {0}, Reason: {1} {2}".format(
  31. e.status, e.reason, e.body))
  32. # Log the number of records
  33. totalresults=sg.get("TotalResults")
  34. numpages = sg.get("TotalPages")
  35. asseticsdk.logger.info("Total Results: {0}, Total Pages: {1}".format(
  36. totalresults,numpages))
  37. # get data from nested output
  38. resourcelist = sg.get("ResourceList")
  39. resource = resourcelist[0]
  40. data = resource.get("Data")
  41. # Copy as "alldata" because we will be appending to this
  42. alldata = data
  43. # Get a list of columns
  44. columns = map( lambda x: x.keys(), data )
  45. if sys.version_info < (3,0):
  46. columns = reduce( lambda x,y: x+y, columns )
  47. else:
  48. columns = functools.reduce( lambda x,y: x|y, columns )
  49. # Write list of columns to the "all" list as this may grow
  50. allcolumns = columns
  51. # Now loop through remaining pages
  52. if numpages > 1:
  53. for pagenum in range(2,int(numpages) + 1):
  54. # set page number to get
  55. kw["request_params_page"]=pagenum
  56. asseticsdk.logger.info("Page: {0}".format(kw["request_params_page"]))
  57. # Now get results for this page
  58. try:
  59. sg = sapi.search_get(**kw)
  60. except assetic.rest.ApiException as e:
  61. asseticsdk.logger.error("Status {0}, Reason: {1} {2}".format(
  62. e.status, e.reason, e.body))
  63. # get actual data from nested output
  64. resourcelist = sg.get("ResourceList")
  65. resource = resourcelist[0]
  66. data = resource.get("Data")
  67. # Get column list for this page - there may be new columns
  68. columns = map(lambda x: x.keys(), data)
  69. if sys.version_info < (3, 0):
  70. columns = reduce(lambda x,y: x+y, columns)
  71. # Add new column list to "allcolumns", will get unique list later
  72. allcolumns = allcolumns + columns
  73. # append new data to "alldata"
  74. alldata = alldata + data
  75. else:
  76. columns = functools.reduce(lambda x,y: x|y, columns)
  77. # merge column list sets
  78. allcolumns = allcolumns | columns
  79. # append new data to "alldata"
  80. alldata.extend(data)
  81. if pagenum > 10:
  82. # catchall escape
  83. break
  84. if sys.version_info < (3, 0):
  85. # get unique list of columns
  86. columns = list(set(allcolumns))
  87. else:
  88. columns = allcolumns
  89. # create csv
  90. if sys.version_info < (3,0):
  91. with open( "c:/temp/road_dump.csv", "wb" ) as out_file:
  92. csv_w = csv.writer(out_file)
  93. csv_w.writerow(columns)
  94. for i_r in alldata:
  95. # map data to column list by key to avoid potential issues
  96. # with column order
  97. csv_w.writerow( map( lambda x: i_r.get( x, ""), columns))
  98. else:
  99. with open( "c:/temp/road_dump3.csv", "w", newline="") as out_file:
  100. csv_w = csv.writer(out_file)
  101. csv_w.writerow(columns)
  102. for i_r in alldata:
  103. # map data to column list by key to avoid potential issues
  104. # with column order
  105. csv_w.writerow(map(lambda x: i_r.get( x, ""), columns))

How it works

The search parameters are defined. The searchguid is the unique ID of the search profile that will be exported (refer to this article).  The parameters include a filter by Asset Class = "Transport" to restrict the search results to only those records where the Asset Class is "Transport".

# Define search parameters searchguid =  "f9149c1b-0a43-e611-945f-06edd62954d7" numpagesize =  50  # Return only those records where Asset Class = "Transport" searchfilter =  "ComplexAssetClass=Transport"

The parameter "request_params_page" is set to '1' so that the first page of results are returned. The number of records is set by "request_params_page_size". The records are returned ordered by Asset Id in ascending order, set by "request_params_sorts, and filtered by request_params_filters"

# set search criteria as keyword args kw =  {"request_params_id":searchguid,  "request_params_page":1,  "request_params_sorts":"ComplexAssetName-desc",  "request_params_page_size":numpagesize,  "request_params_filters":searchfilter}

 

The search is executed and the array of data obtained

# Get first page of results sg = sapi.search_get(**kw)  # get actual data from nested output resourcelist = sg.get('ResourceList') resource = resourcelist[0] data = resource.get('Data')

Each page in subsequent requests may return a different number of fields. The following code block manages this by building a list of columns and holding an array of data (field name and value pairs).

# Copy as alldata becuase we will be appending to this alldata = data  # Get a list of columns columns = map(  lambda x: x.keys(), data ) columns = functools.reduce(  lambda x,y: x|y, columns )  # Write list of columns to the 'all' list as this may grow allcolumns = columns

 

Subsequent pages are then requested as a loop using the total number of records and page size to determine the number of loops

  for pagenum in range(2,int(numpages)  +  1):  # set page number to get         kw['request_params_page']=pagenum          asseticsdk.logger.('Page: {0}'.format(kw['request_params_page']))  # Now get results for this page  try:             sg = sapi.search_get(**kw)  except assetic.rest.ApiException  as e:             asseticsdk.logger.error("Status {0}, Reason: {1} {2}".format(                 e.status, e.reason, e.body))

 

The data array for each page is added to the "alldata" array and the column list "columns" updated to include the columns in the page.

# get actual data from nested output         resourcelist = sg.get('ResourceList')         resource = resourcelist[0]         data = resource.get('Data')  # append new data to 'alldata'         alldata = alldata + data         # Get column list for this page - there may be new columns         columns = map(lambda x: x.keys(), data)         columns = functools.reduce(lambda x,y: x|y, columns)  # merge column list sets
allcolumns = allcolumns | columns

Using the unique list of columns and the key/value pairs in the data array the csv file is created. This method also ensures that the data is written to the correct fields, since if there are variable field numbers in each page then the field order would be incorrect if the data were simply written to file page by page.

# create csv  with open('c:/temp/road_dump.csv',  'w', newline='')  as out_file:     csv_w = csv.writer(out_file)     csv_w.writerow(columns)  for i_r in alldata:  # map data to column list by key to avoid potential issues with column order         csv_w.writerow(map(lambda x: i_r.get(x,  ""), columns))